{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "Python 3.6.7 64-bit ('DataAnalysis': conda)",
   "display_name": "Python 3.6.7 64-bit ('DataAnalysis': conda)",
   "metadata": {
    "interpreter": {
     "hash": "07829f70181eff8a51ba0c91cec5a602c5da58bbdadabc96e22cfe4f56b0b078"
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "*** Introductory Examples for the NLTK Book ***\n",
      "Loading text1, ..., text9 and sent1, ..., sent9\n",
      "Type the name of the text or sentence to view it.\n",
      "Type: 'texts()' or 'sents()' to list the materials.\n",
      "text1: Moby Dick by Herman Melville 1851\n",
      "text2: Sense and Sensibility by Jane Austen 1811\n",
      "text3: The Book of Genesis\n",
      "text4: Inaugural Address Corpus\n",
      "text5: Chat Corpus\n",
      "text6: Monty Python and the Holy Grail\n",
      "text7: Wall Street Journal\n",
      "text8: Personals Corpus\n",
      "text9: The Man Who Was Thursday by G . K . Chesterton 1908\n"
     ]
    }
   ],
   "source": [
    "from tools import *\n",
    "from nltk.book import *\n",
    "%matplotlib inline"
   ]
  },
  {
   "source": [
    "# Chap1 语言处理与Python\n",
    "\n",
    "目的：\n",
    "\n",
    "1.  简单的程序如何与大规模的文本结合？\n",
    "2.  如何自动地提取出关键字和词组？如何使用它们来总结文本的风格和内容？\n",
    "3.  Python为文本处理提供了哪些工具和技术？\n",
    "4.  自然语言处理中还有哪些有趣的挑战？"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "## 1.3 计算语言：简单的统计"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "sorted(saying)=  ['After', 'all', 'and', 'done', 'done', 'is', 'is', 'more', 'said', 'said', 'than']\nset(saying)=  {'and', 'After', 'is', 'said', 'all', 'than', 'more', 'done'}\ntokens= sorted(set(saying))=  ['After', 'all', 'and', 'done', 'is', 'more', 'said', 'than']\ntokens[-3:]=  ['more', 'said', 'than']\n"
     ]
    }
   ],
   "source": [
    "saying = ['After', 'all', 'is', 'said', 'and', 'done', 'more', 'is', 'said', 'than', 'done']\n",
    "print(\"sorted(saying)= \", sorted(saying))\n",
    "tokens=set(saying)\n",
    "print(\"set(saying)= \", tokens)\n",
    "tokens=sorted(tokens)\n",
    "print(\"tokens= sorted(set(saying))= \", tokens)\n",
    "print(\"tokens[-3:]= \", tokens[-3:])"
   ]
  },
  {
   "source": [
    "### 1.3.1 频率分布"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "FreqDist(text1)=  <FreqDist with 19317 samples and 260819 outcomes>\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "FreqDist({',': 18713, 'the': 13721, '.': 6862, 'of': 6536, 'and': 6024, 'a': 4569, 'to': 4542, ';': 4072, 'in': 3916, 'that': 2982, ...})"
      ]
     },
     "metadata": {},
     "execution_count": 3
    }
   ],
   "source": [
    "fdist1=FreqDist(text1)\n",
    "print(\"FreqDist(text1)= \", fdist1)\n",
    "fdist1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "fdist1.most_common(50)=  [(',', 18713), ('the', 13721), ('.', 6862), ('of', 6536), ('and', 6024), ('a', 4569), ('to', 4542), (';', 4072), ('in', 3916), ('that', 2982), (\"'\", 2684), ('-', 2552), ('his', 2459), ('it', 2209), ('I', 2124), ('s', 1739), ('is', 1695), ('he', 1661), ('with', 1659), ('was', 1632), ('as', 1620), ('\"', 1478), ('all', 1462), ('for', 1414), ('this', 1280), ('!', 1269), ('at', 1231), ('by', 1137), ('but', 1113), ('not', 1103), ('--', 1070), ('him', 1058), ('from', 1052), ('be', 1030), ('on', 1005), ('so', 918), ('whale', 906), ('one', 889), ('you', 841), ('had', 767), ('have', 760), ('there', 715), ('But', 705), ('or', 697), ('were', 680), ('now', 646), ('which', 640), ('?', 637), ('me', 627), ('like', 624)]\n"
     ]
    }
   ],
   "source": [
    "# FreqDist 的切片\n",
    "print(\"fdist1.most_common(50)= \",fdist1.most_common(50))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "fdist1['whale']=  906\n"
     ]
    }
   ],
   "source": [
    "# FreqDist 的索引\n",
    "print(\"fdist1['whale']= \",fdist1['whale'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEbCAYAAAAS4RmTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xd4HNW5+PHvK8myLDfZsrEtdxubYoNtJBswxjYOPZiQUEIIoQZDyI+Qm3AvEBICBMhNAgmQS0IJJQW4FwMBTHcvmCa54N57lSxX9fL+/jiz9mq9K+2utsjS+3mefXZ3ZnbOOzu7++6cc+aMqCrGGGNMLKQkOwBjjDHNhyUVY4wxMWNJxRhjTMxYUjHGGBMzllSMMcbEjCUVY4wxMWNJxRhjTMxYUjHGGBMzllSMMcbEjCUVY4wxMZOW7AASrUuXLtqvX7+oXltWVkabNm0imhfv6c2lDCs78WUnooyWWnYiyohl2eEoKCgoUtWuDS6oqi3qlpubq9HKz8+PeF68pzeXMqzs5llGSy07EWXEsuxwAPkaxm+sVX8ZY4yJGUsqxhhjYsaSijHGmJixpGKMMSZmLKkYY4yJGUsqxhhjYqbFnadijDEtSdGhCpZu28+y7Qc4UFhKbm58y7OkYowxzYCqsm1fGUu3HWDa0oM8veQrlm3fz64DFYeXGZCVxr1xjiMuSUVEMoAfAncBD6jqyyJyife8PfChqv5SRFKBp4ChQCvgVlVdIiKjgMe8aZ+q6l2RLBuPbTLGmKaitlbZdrCarYu2sXz7AZZu38/SbQfYX1blt1QJAO1ap3Fyjw6cnNOBrJq9cY8tXkcq3YAK4BW/aV8D5wG1wAYReRw4H0hT1XEiMgHwTXsemKiqm0VkjoiMBnqHu6yqzo/TdhljTELV1Cqrdh5k6bb9XvLYz/LtByiprAGK6izbuW06Q3I60CW1jG+cNpihOR3p0zmTlBQBoKCgIO7xxiWpqOom4HkRecBv2mYAEckCyoFDwGjgfRHJA+4EThCRTkANsENEfgekAyOBAREsa0nFGHPMqaqpZc2uQ4cTyJJt+1m2bR+VNbuOWja7TQoj+nVlaM8ODM3pyJCeHejeIQMRoaCggNxTc5KwBSBuSJc4rdwllY2q+rL3vC0wGfijqk4Tkadw1WFrgT8Ay4FRwDxgLvAMMAbXS21guMuq6pMBcUwCJgH06NEjd8qUKVFtT2lpKZmZmRHNi/f05lKGlZ34shNRRkstO5zXVNUqW/ZXs25vFev3VrF2TwVbDtZSVXv0uo5rm8qArDQGdmpF/06tGJCVRqvaiphtXzjy8vIKVDWvwQXDGSAs2hvwAHCD97gL8Akwzm/+dcCL3uMzgPe8x2uBfoAAH+KOPsJetr6YbEDJplmGld08y2ipZQfOq6qu0WXb9utrX2zSW56drhP/PFcH/eID7Xv3e0fdxv1+hv74lQL966y1Om9Noc6c/2Wjyg73NQ0hzAEl49VQ3wt4G8gBKrw2kE7AIOBBEQF4DngNOF9E5gBVwG3eKibhjmjKgamq+pWILAp32XhskzHGhENV2VJcxrzNZXywYzlfb93Hkm37Ka9zCFIGwIAubRnasyOn9OxIeslOvn3OSDpktKqzvoL9mxIYfePFq01lK9DwYZJzbZDXz8AdnfhPqwp3WWOMSZT9ZVUs3rKPRX634pJK39zDy/XNzuTUXll01oNcdMYQhuR0oL1fAiko2HtUQjkW2XkqxhgTpuqaWlbuPMiiLftYuHkfn68pZNvkT45aLrttOv06CGOH9GVY744M65VFp7bpgOuBlTsgO9GhJ4wlFWOMCWH3gXIWbN7HR18f5Hf5n7Fk637KqmrqLJOelsLQnA4M792J4X2yGNE7i16d2rBgwQJycwclKfLksaRijDFAZXUtq/dUsnjeBhZs3svCzfvYtq/Mbwl3MmG/7EyG985iRJ9OZJTs5NvnjCI9zYZR9LGkYoxpkfaXVbFg817yNxbz1ca9LN6yj4rqWqD48DLtWqcxvHcW3VqV8c3TT2JYryyy27U+PL+gYI8llACWVIwxLcLO/eV8ubGY/I3FzF5exOY3PiHwNL2e7VMZPbgHp/XtxGl9OnH8ce1ITfFOJjyxW3ICP8ZYUjHGNDuqytYD1az6YjP5G4v5cmMxW/eW1VmmVapwaq8s8vp1YmTfzuT27cT6lUvIzR2WpKibB0sqxphmYUtxKfPXFfHp2j3MX7eHokMV+I+N1b51Grn9OjGyX2c6VBRy5TdGkdEqNXkBN1OWVIwxx6S9JZXMW1vEO/n7WTV9BluK6x6JZGWkcNbgbozq15m8fp04sXsHUg8PrLjfEkqcWFIxxhwTqmuVL9bvYe6aIuauKeTrbfvrtIl0yEjjzIHZjB7YhbOOz2bf5lXk5Z2WvIBbKEsqxpgma/OeUmavKWT2qkLmrdlNefWR0XrTU1MY2b8T/dtU8N1xwzk558iRCEDBFgm2ShNnllSMMU1GaWU1BTvKmfLuMmavLmRDUUmd+YOOa8fZg7oydnAXTu+fTZv0VAoKCjilV8ckRWwCWVIxxiSNqrKu8BCzVhUya1UhX24oprKmFtgHuCqtMYO6MG5wV7LKd3LB2aOSG7BpkCUVY0xClVZWM29NEW8U7GfZ1Jl1zloXgeM7teLiEX0Zd0JXhvXKIi3VnVxYUFCYrJBNBCypGGPiruhQBTNW7OaT5TuZu6bIO3PdyW6bztjBXRl/QlfGHN+FjauWkpt7QhKjNY1hScUYExcbi0p4Z1UJ//3VfPI37a3TU2tY7yxO6lDNNecMY2hOx8PXUAfYmPhQTQxZUjHGxISqsmLHQT5etpOPl+1k5c6Dh+elp6Zw5sBszh/SjXNP6ka3DhkUFBRwaq+sJEZs4sGSijEmarW1ysIt+/j74gP8x/RZbC4uPTyvfes0hh2XxtVnn8S4wV3rXJDKNF+WVIwxEampVb7cUMxHS3fw0bKd7DpQcXhel3bpnHdyN84f0p3RA7NZungRuafmJDFak2iWVIwxDaqqqeWzdXv4Z8F+FnwwjT2HL5cLPbPaMKKrcN2EYeT27VTnBETT8lhSMcYEVVFdw6dri/hgyU6mLt/F/rKqw/P6ZWdy4dAeXDS0O6f26uiucti/cxKjNU2FJRVjzGHlVTXMXl3IK1/sY+G70zhYUX143uBu7RiWrdx03mmc2L09InZEYo5mScWYFq6ssobZq3fz/pKdzFixi5LKI9dgP7lHBy4a2p2LTunO8ce1p6CggJN6dEhitKaps6RiTAtUVlnDzFW7+ddn+1j0zlRK/RLJqb06ckqnGm65II9+XdomMUpzLLKkYkwL4avaeu/rHUxfsatOIhnWO4uLh3bn4lN60LtzJgUFBZZQTFQsqRjTjFVW11Kwo4JXXl/E1GW76rSRDOudxfBONdxyUR69OmUmMUrTnFhSMaaZqfEuZjXl6+18uHQn+0qrgL0ADMnpwCWn5nDJqUeOSCyhmFiypGJMM1Bbq6wsqmTKu8t47+sd3vXZnV4d0rjq9AFccmoPBnRtl8QoTUtgScWYY5SqsnLnQd5ZtJ0pi7d7Q8gXA9A3O5OJp+YwcVgOh7atJjd3UHKDNS2GJRVjjjG7Sqp5euZa3lm0jdW7Dh2ent0mhe/k9WXisBxO6dnx8HkkBduSFalpiSypGHMMOFhexftf72BywVYKNu0FigDIymzFN0/pwbeG90T2rGdk3snJDdS0eJZUjGmialX5bN0eJhds4YMlOyivche2ap0qXHhKD741PIcxx3clPc27MmLxhmSGawxgScWYJmfH/jIm52/llflF7CrZdXj66f07c2Veb7pX72DM6SOSGKExocUlqYhIBvBD4C7gAVV9WURGAY8BrYBPVfUuEUkFngKGetNvVdUljV02HttkTDxV1dQyY+Vu/vfLzcxeXUitd5XEnI4ZXJ7biytye9E3252MWFCwq541GZNc8TpS6QZUAK/4TXsemKiqm0VkjoiMBnoDaao6TkQmAI8D5zd2WVWdH6ftMiamdhyq5r8/XMkbBVsPdwNOT03h/CHdGNGxnBsuOtOGkjfHFFH/C0fHeuUiD+AuOf0OMB04HXgYGAe8Bgzwpm8HfgUM926NWlZVnwyIYxIwCaBHjx65U6ZMiWp7SktLycwMfqJYqHnxnt5cymhJZVfWKF9sK2fa+jKWFh65Lkmv9qmcOyCTcX3b0KF1yjG7fVZ2YsqIZdnhyMvLK1DVvAYXVNW43YAHgBuAzsBy4FlgBHAHcCeuOusl4D4gHVgbi2Xriyk3N1ejlZ+fH/G8eE9vLmW0hLJX7zygD767TIc9+LH2vfs97Xv3ezr4F+/rXa8v0vyNe7S2tjZuZYfSXN7bplZ2IsqIZdnhAPI1jN/9hDTUq2qxiKQDvwU2AY8C9+PGjhivqo+IyBnAyhgta0yTUFldywdLdvDXGXtYtWfO4elDcjrwvVF96MNuxp4xLIkRGhNb8Wqo7wW8DeQAFV4byCRgMlAOTFXVr0RkEXC+iMwBqoDbvFU0atl4bJMxkdhzqIJXv9jMPz/fxO6Drq2kXes0Lh2ew/dG9uGUXh0BKCgoSmaYxsRcXJKKqm4FgtW9jQxYrgq4NsjrZzRmWWOSZdP+Kl5/42v+vWgbldXuvJLB3dpxTq8UfnLpmbRtbb34TfNmn3BjGklVmb26kOfnrufTtXuAPQBMOPE4bjqrP2cdn82CBQssoZgWwT7lxkSporqGdxZt529z1x8egysjVfjuqD5cP7qfjQhsWiRLKsZEaF9pJW+uOMRtH82k0Gsv6dahNTeM7s/JrYsZd+bQJEdoTPJYUjEmTFuKS3lh3gZez99y+FK8J3Zvzy1nD2DisBzS01IoKNiX5CiNSS5LKsY0YMnW/Tw7Zx0fLNlxePiUYd3SueuS4Yw5vsvhIeaNMZZUjAlKVZm1upDHZxWztHAeAGkpwmXDc7hl7ABKt68hd1DXJEdpTNNjScUYP6rKjJW7eWLaGpZs2w+480u+N6o3N57Vn5ysNgAUbE9mlMY0XQ0mFRG5S1UfE5GxwAvA46r6TPxDMyZxgiWTLu1ac1H/Vvzn5aPpkNEqyREac2wI50jlm7ih5f8LGA/8L2BJxTQLqkrBjnIefPpTvt56JJncNm4A3z+9L8uXLLKEYkwEwkkqIiJ5QJmqbhORmngHZUy8qSqzVhXyxLTVLA6STNqkpyY5QmOOTeEklb/gBmz8f97zwviFY0x8+c5+f2LaGhZtcd1/O7ZO4Y5zT7BkYkwMNJhUVPV14HW/51fGNSJj4kBVWbyrgkf+Op8Fm10yyW6bzq3jBjCkdTFnnT4gyREa0zyE01B/papODvXcmKbus3V7+OPUVXy1cS8AndumM2nsAK47sy+Z6Wl2wqIxMRRO9dePcEPLh3puTJNUsKmYxz9Zzfx1boDHdunC7RMGc/2Z/WxwR2PiJOQ3S0T6Av2ALK87MUA27mqLxjRZa4ureOrFL5m92jX/tc9IY9LZAxjedh9nn3F8kqMzpnmr7+/aQOAHQHfcJYEFKAVujX9YxkRuxY4D/HHqaqYud0cmbdNTuWlMf344ZgAdM1tRUFCQ5AiNaf5CJhXv4lczRGSmqt6UwJiMicja3Qf507Q1vP/1DgDSU+HGMQO4dexAOrdNT3J0xrQs4fT+OicRgRgTqR2HqvnZ/y3i7UXbqFVIT0vh+6f34azOJZx71knJDs+YFimc3l8PA9cB1bgqMFVV639pkmbbvjL+PH0Nr+cXUavQKlW4ZmRvfnzO8fTo2MaquYxJonC6wFwA9FPV2ngHY0x9ig5V8PTMtbzy+WYqa2pJEbgqrxd3TBhE786ZyQ7PGEN4SWUFMBhYGedYjAmqpLKWxz5exYufbqC0sgYRuHRYDuf1qGTi+GHJDs8Y4yecpJIL5IvIbqz6yyRQaWU1L8/fyNPTCymp2g3AuScdx8/PP4GTenSwai5jmqBwGuqHJCIQY3yqamp5PX8LT0xbc/ga8GcM6Mx/XnAiuX07JTk6Y0x9wmmovz9wmqo+FJ9wTEumqny0dCd/+HgV64tKADilZ0e+PTCFGy86wy7ba8wxIJzqr01+j0fgzqo3JqaW7q7gob/MZ7E3cnC/7EzuuuAELh7ag4ULF1hCMeYYEU7119/9nv5dRF6NYzymhSk8WMGv313KB0vcYI9d2rXmznMHcfXI3rRKTUlydMaYSIVT/XWd39O2uPHAjGkUVeXtRdt4cMpy9pVWkZEm/PicQdw0pr8N9mjMMSycb29/v8cluBMhjYnajv1l3PfvpcxY6Xp0nT2oC9cMEi4aOyjJkRljGiuc6q8HRaQL0BdYraoH4x+WaY5UlWnrS/nXu3M4WFFN+4w0fnXJyVyZ24sFCxYkOzxjTAyEU/11C3AnsAQ4UUQeVtU34x6ZaVa27yvj7je/Zu6aA4A73+SRb59Ctw4ZSY7MGBNL4VR/3QycpqqVItIG+ASwpGLCoqq8tWAbD0xZxsHyatqnCw9/ZxiXDsuxHl3GNEPhJJVKVa0EUNUyEYlqLHERyQD+AfQC2gEPAFuBx4BWwKeqepeIpAJPAUO96beq6hIRGRXustHEZ2Kv6FAFv3hrCZ8s3wW4o5Orj1fOHd4zyZEZY+IlnKTypohMA74ARgFToyzrdKBCVUeLSB5wP66dZqKqbhaROSIyGugNpKnqOBGZADwOnA88H8GyJsm+2FbOpA/msKekkvat07h/4slcYW0nxjR74TTUPykiU4EhwBuqujDKsj4Hfioif8MdrdyHSxQ7ROR3QDowEhgAvO8lnjuBE0SkE1ATzrJRxmZipKSiml+9s5S3FriTGM86PpvfXzGMnlltkhyZMSYRRFVDzxS5FnjVN+y9uErwq1X1tYgLEukKvAK8AVwGfAp8H5gLPAOMAVJwlzFuD6wF/gAsxx0hzQtnWVU96iLkIjIJmATQo0eP3ClTpkQaPgClpaVkZgYfYj3UvHhPb0plbN5fxWOf7WPbwRrSU+AHw9pz4cBMUvzaTpri9rXUshNRRkstOxFlxLLscOTl5RWoal6DC6pqyBswK8i0afW9pp513Qw85j3uBSzCJYN+uNGPP8QdfVwHvOgtdwbwnvc47GXru+Xm5mq08vPzI54X7+lNpYw38rfoCb/8QPve/Z6e98dZ+vaMzxNWdrymN/eyE1FGSy07EWXEsuxwAPkaxm99Q9VfwbrnRNsH9APg+yIyC1ft9htgLzAZKAemqupXIrIIOF9E5gBVwG3e6ydFsKxJkIoa5e43vub/8rcA8J3TevLwZUNZsWRxkiMzxiRDQ0lliYjcA/zFe34zrsdWxFR1BzAhyKyRActVAdcGef2McJc1ibGhqIR7p+9h0/5qWqel8JtvDeXKvF7WVdiYFqyhpHIXcA8wzVt2HnY0YICPlu7krsmLOVRRTf8ubXn6mtM4OadDssMyxiRZvUlFVctx55M8kIhgTNNXU6v8ceoqnp65DoAze7XmuR+eRfuMVkmOzBjTFNhwsCZse0sq+cn/LmTumiJSU4R7LjyREZnFllCMMYfZBStMWNbvrWLi/8xj7poistum88+bR3HL2AHWfmKMqcOOVEyD3izYyn0z9lBZC8N6deSv1+aSYyczGmOCsKRiQqquqeWRD1bw0qcbAfjeqN78euIQMlqlJjcwY0yTZUnFBLW/rIo7XlvInNWFtEoVbh7ennu+c2qywzLGNHHhXE8lFbgC6KKqT4vIiaq6Mv6hmWTZUFTCzX//ivWFJWS3TeeZH+SSsmdDssMyxhwDwmmo/yeQB1zvPX8ifuGYZJu3pojLnv6U9YUlnNi9PW//+CxG9uuc7LCMMceIcKq/clT1GhGZ6T2P6noqpun7cG0JLy3+kppa5byTu/HEd4fTtrXVkBpjwhfOL0aZN7S8isgIrBtys1Nbqzz03nJeXngQgNvHD+Su808gJcW6CxtjIhNOUrkNN6x8d+BB4Pa4RmQSqqqmlrsmL+adRdtJS4HHrhzOZSPsyozGmOiEk1S2qepVcY/EJFxZZQ0/eqWAWasKaZueyn+e2cESijGmUcKpylopIn8UkaFxj8YkzP7SKq594QtmrSqkc9t0Xpt0Bqcc1zrZYRljjnHhJJUhuEsBPyoi87yrKJpjWHFZDVc9+xkFm/aS0zGDybedyam9spIdljGmGWgwqahqhaq+DvwaWAU8FPeoTNxsLCrhvpnFrNp1kOOPa8cbPxrNwK7tkh2WMaaZaDCpiMg9IrIYd6XG94HecY/KxMXug+V87/nP2V1Sw7DeWUy+9Uwbw8sYE1PhNNSnABep6vZ4B2Pip7yqhlv/WcCO/eWckN2KV394up2DYoyJuXCqvx71TygicnZ8QzKxpqr84q0lLNy8j55Zbbh7dJYlFGNMXERzIuODMY/CxNWzc9bz1sJttGmVyvPX5dExw0YZNsbER8ikIiKfiUi2iGwQkfXebQNwRgLjM400fcUufveRG//zT98dbteRN8bEVX11IL8F9gEbVfUc30S/McBME7d5fxW/fGchqvDz8wZz4dDuyQ7JGNPMhUwqqvougIj8NWDW+3GNyMREcUkl//3pPkoqa5g4LIf/N+H4ZIdkjGkBwmmofz3g+WPxC8fEQlVNLbe/UsCukhpO6dmR319+ql1L3hiTEOGcp/JIfc9N0/PI+yv4fH0xWRkpPH9dHm3SrWHeGJMY4fT+OjPg+Zh4BGJi460FW3l5/kZapQr/NTqL7h0zkh2SMaYFCdmmIiJjgXOAfiJyvzc5u77XmORaum0/9761BIAHLh3CCWlFSY7IGNPS1Heksh3YCFQAm7zbPOCb8Q/LRKq4pJJb/1lARXUtV4/szTWj+iQ7JGNMC1Rf76+1wFoRGa6qf09gTCZCNbXKHa8tYNu+Mob1zuLBbw2xhnljTFKEU5V1l3c54UzfBFWdE7+QTKT+teQgn64tpUu7dJ659jRap1nDvDEmOcJJKpOBVGAksASoAiypNBFTFm/n3dWlpKUIT19zGj062qjDxpjkCaf313Gq+i1gJa49ReMbkgnXml0H+a83vgbgl988idMHZCc5ImNMSxdOUvElkSrcVSBzoi1MREaJyIciMkNEbvaez/HGGXvMWyZVRJ4WkdkiMl9ETvF7bVjLtgTlVTXc8dpCyqpqGNsng+tH90t2SMYYE1b11+9EJAV3ka4ngX9EU5CItPdef6mqFnrTFgMTVXWzlzBG4y4Clqaq40RkAvA4cD7wfATLNnt/+HgVK3cepF92JpNy21nDvDGmSRDVxNRmicjFuMS0C2gP/A24EzgdeBgYB7wGDACm47o0/woY7t2mh7OsqvYNUvYkYBJAjx49cqdMmRLVNpSWlpKZmRnRvHhMX7Szgt/M3UuKwKMTOtMzozquZSd6+6zs5lNGSy07EWXEsuxw5OXlFahqXoMLqmpEN+CFSF/jve5q4FnvcVdgA7AceBYYAdyBSzJPAS8B9wHpwFqgc7jLNhRHbm6uRis/Pz/iebGeXnSwXPMenqp9735P/zx9dULKTkQZVnbzLKOllp2IMmJZdjiAfA3jt76+M+qvCzaZo4dtCdca4AbvcSVQAmTghtjfBDwK3A/sBcar6iMicgawUlWLRSQ9nGWjjO2YoKrc/eYSCg9WMKpfZ3403kYeNsY0LfW1qTyIOwoIrKyPajApVS0QkVUiMh/X6H8nrhPAZKAcmKqqX4nIIuB8EZnjLXebt4pJESzbLL3yxWamrdhF+4w0/nT1cFJTrB3FGNO01JdUVqjqQ4ETRWR8tIWp6p1BJo8MWKYKuDbIa2eEu2xztPVANQ/PWA7Ao98+hZ5Zdj6KMabpCdmlWFUvDjHryjjFYkKoqK7hiS/2UV5Vy3dO68nEYVH36jbGmLhqsEuxN1pxIDujPoEe/2Q1G/ZV06dzJg9eOiTZ4RhjTEjhnKdyo9/jIUAxllQSZv7aIp6fu54UgT99dzjtM1olOyRjjAmpwaSiqoeTitcDK6qTH03k9pVW8rPXF6MKV53cjty+nZIdkjHG1Cuc6i//C3O0pRHDtJjwqSq/+PcSdh4o57Q+WVx+Uutkh2SMMQ0Kp/rL/1oqJbihVkycvVGwlQ+W7KRd6zSe+O4ICjeuSHZIxhjToHCqv85JRCDmiE17Snjg3WWAuyxwn+xMCjcmNyZjjAlHONVfH3vLHT7TTlUnxDOolqymVvnp/y2ipLKGb57ag8tP65nskIwxJmzhVH8tw/X2WhjnWAzwxopDLNxcQo+OGTx62Sk2+rAx5pgSTlI5Ezeg46V+026KTzgtW8GmvbyxvAQRePyqYXTMtO7DxphjSzhJZQnuaGVRnGNp0WpqlXvf+ppa4NaxAxg9sEuyQzLGmIiFk1S2AVm4a5gIbhDI2fEMqiV6a8FWVu86RNfMFH523uBkh2OMMVEJJ6nMjHsULVx5VQ1/mroagO8NbU/rtNQkR2SMMdGxYVqagH9+tont+8s5sXt7zu4T1ZUFjDGmSbBhWpJsf1kV/zNzLQB3X3QiKYe2JDkiY4yJXsih731EpI/vBgzEhmmJqWdnr2N/WRWn9+/M+MFdkx2OMcY0SrjDtCiukf4QNkxLzOw6UM6Ln24A4J6LTrRzUowxxzwbpiWJnpi2hvKqWi4a2p0RfWwEYmPMsa/e6i8ReVVEUvyei4i8HPeoWoCtB6p5PX8LqSnCXReckOxwjDEmJhpqU+mhqrW+J6qqQO/4htQyvLr0IDW1ylV5vRnYtV2ywzHGmJhoKKm0EpFM3xMRyQDaxzek5m/B5r18sa2CjFYp/PTcQckOxxhjYqahNpUngGki8r+4xvqrgZfjHVRzpqr8/qOVANx0Vn+6dbDzUowxzUe9SUVV3xCRxcB5QCvgTlXNT0hkzdTn64v5fH0x7VoJt44bmOxwjDEmpsLp/bUGWJOAWFqEJ6e74VguGdyWjm1sFGJjTPPS4MmPJna+WL+Hz9cX0yEjjYsHZTb8AmOMOcZYUkmgJ6e7A76bxvSnbSt7640xzY/9siXIVxuLmb9uD+1bp3Hj6P7JDscYY+LCkkqCPOUdpdx4Vj+7oqMxptmypJIABZv2MndNEe1ap3HTGDtKMcY0X5ZUEsDXlnLD6H6r55+tAAAbBElEQVRkZaYnORpjjImfcEYpNo2wek8lc1YX0zY9lZvtKMUY08wl/EhFRO4QkWoR6Scio0Rkjoh8JiKPefNTReRpEZktIvNF5BRvetjLNiWTl5cAcN3ofnRqa0cpxpjmLaFHKiLSD7gEmO9Neh6YqKqbvYQxGjdgZZqqjhORCcDjwPkRLtskLN6yjwU7K8hMT+WWswckOxxjjIk7cQMPJ6AgdwWqd4GfA88B/wG8AJwOPAyMA14DBgDTge3Ar4Dh3m16OMuqat8gZU8CJgH06NEjd8qUKVFtQ2lpKZmZwU9aDDbv0Xl7KdhRwWUntOUHp7ZvcPlopsdyXcksw8pOfNmJKKOllp2IMmJZdjjy8vIKVDWvwQVVNSE34FbgZ97jWcBpwHLgWWAEcAdwJ/AU8BJwH5AOrAU6h7tsQ3Hk5uZqtPLz88Oet2TrPu1793s6+Bfva+HB8rDXFen0WK4rmWVY2c2zjJZadiLKiGXZ4QDyNYzf+kS2qUwELhORWbgjj78AJwG/BRYBF+OqxfJxl255BJd4VqpqsZc0Glw2gdtTr7/OXgfAeQPa0KVd6yRHY4wxiZGwNhVVvcT32EssN+CqryYD5cBUVf1KRBYB54vIHKAKuM172aQIlk2qDUUlfLhkB61ShUsHt012OMYYkzBJ6VKsquO9hxuBkQHzqoBrg7xmRrjLJttzc9ZRq3DFiJ5kZ1YnOxxjjEkYO/kxxnYdKOfNgm2IYNdLMca0OJZUYuxvc9dTWVPLRUO727XnjTEtjiWVGNpXWsmrX2wG4Pbxxyc5GmOMSTxLKjH0j882UVJZw9mDujC0Z8dkh2OMMQlnSSVGyqtreenTDQD8aLy1pRhjWiZLKjEybUMZe0urGN47izMHZCc7HGOMSQpLKjFQWV3LlFVu4Mjbxw/EjUhjjDEtjyWVGHhn0TaKymoZdFw7zj2pW7LDMcaYpLGk0ki1tcoz3pAsPxo/kJQUO0oxxrRcllQaafbqQtYVltA1M4WJw3KSHY4xxiSVJZVGmrumCIDx/drQKtXeTmNMy2a/go2Uv6kYgJO72FUdjTHGkkojlFRUs2z7AVJThEHZrZIdjjHGJJ0llUZYuHkfNbXK0JwOtEmzt9IYY+yXsBG+2uiqvvL6dU5yJMYY0zRYUmkEX1IZ2a9TkiMxxpimwZJKlKpqalm4eR9gRyrGGONjSSVKy7cfoKyqhgFd2to16I0xxmNJJUpHqr7sKMUYY3wsqUTpSCO9tacYY4yPJZUoqCr5G/cCdqRijDH+LKlEYX1RCXtKKunavjV9szOTHY4xxjQZllSikO/XldiunWKMMUdYUonClxus6ssYY4KxpBIF3yCSllSMMaYuSyoR2n2gnE17SmmbnsqJ3dsnOxxjjGlSLKlE6Cuv19dpfTuRZtdPMcaYOuxXMUJ20qMxxoRmSSVCllSMMSY0SyoRKK2qZcWOA6SlCMN7ZyU7HGOMaXIsqURg1Z4qahWG9uxIm/TUZIdjjDFNTsKSiohkisi/RGSmiHwhIieIyCgRmSMin4nIY95yqSLytIjMFpH5InKKNz3sZeNlZVElAKP6W9WXMcYEk5aoglS1VER+rarrRGQScDswHpioqpu9hDEa6A2kqeo4EZkAPA6cDzwfwbJxsaKoCoC8vjaIpDHGBJOwpAKgquu8hzlAMVAD7BCR3wHpwEhgAPC+iOQBdwIniEincJeNV+yV1bWs2eOOVOyiXMYYE5yoamILFLkMuBGYBMwE5gLPAGNw1XEDgfbAWuAPwHJgFDAvnGVV9fggZU7yyqNHjx65U6ZMiTju1XsquXdGMb3ap/LkhV2Pml9aWkpm5tGDS8Z7enMpw8pOfNmJKKOllp2IMmJZdjjy8vIKVDWvwQVVNWE34CbgFSDde74W6AcI8CHu6OM64EVv/hnAe5EuW98tNzdXo/HMrLXa9+739J43Fwedn5+fn5TpzaUMK7t5ltFSy05EGbEsOxxAvobxO5+w6i+viup54FPgExGpxB09TAbKgamq+pWILALOF5E5QBVwm7eKSJaNOd/16O38FGOMCS2RDfX5QLB+uCMDlqsCrg3y+hnhLhsPf75mBG9M/4JzTjguEcUZY8wxyc5TCVOr1BQGZ6fTqW16skMxxpgmy5KKMcaYmLGkYowxJmYsqRhjjIkZSyrGGGNixpKKMcaYmLGkYowxJmYsqRhjjImZhI/9lWwiUghsivLlXYCiCOfFe3pzKcPKbp5ltNSyE1FGLMsOR19VPXrgw0DhjOVit8Njl4Uc+ybUvHhPby5lWNnNs4yWWvaxtn2xvFn1lzHGmJixpGKMMSZmLKlE5rko5sV7enMpw8punmW01LITUUYsy46ZFtdQb4wxJn7sSMUYY0zMWFIxxhgTMwm7SJcxLZGI9FHVzcmOIx5E5Ie4y3in4C7zrap6U3KjMslmRyoJICLP+z2+N8LXXiAi74vIDL9bGxG5U0T+LCI3iUiwK2oGW1eKiOSKyFi/26CAZQaFen1jBCtHRH4aZLmfevcTvPs+IvK8iJwVZNmefo9TReS7IvJj7/mJ3v1R2xxh3D0bXirka1+ngYbRcNcfaj+JyH+ISMMnpMXHJOCPwAPAr717oN790UFELheR63w3b3rEn2mvjG6N3YhQsUZThoicJSLPBUzrICL3iMhvvedj/eZdLyKjY7EP63nPQ25fPFhSiZKIfENEFonICu/5w/Us7v+DcJ7fOl4SkReD3fyW/y3wS6AN8CCwEPg70Al4DxgAPBtm2B8BvwJu9G43BHltuOsK9kNXXyIIVs5VItLbW76PiPQFrvLm/9q7fxiYCTzqrfsV7/4W4CMR8a33n0AecL33/Anv/sMg21xvrMHKEJHTRGSgiDT4nRGRF7yHnVX1wiDzQ21DfULtp9beOt4WkctE5HDtg4g8ElDuIwHxBcYbans+EZHp3h+amSIyw5v1BTANeBn3mXzZ72Wh9scnwNlAf78bhPhMh/qeicj3gWXADO/58979BhFZ73dbEMb3LGisocrwHt8R8B75nv8P8I+At/AV4CAwznt+n9+8bOAK4H9EZJqIfOWtb6KIvBf4novIN0XkpyGSUKj3PNT0+Ij32ZXN7Qb0An4OrMB9IGZ606fW85ppuB/G+4H13v39wAu4D9oS734c8BjwkN9rZ3n3M7z7T3xl+i0zM0iZlwPjvccdgN8Da4FWAcvN8HvcCVgcwXsxI+D5bO/+H8A1vuehygH24r6wM/1uxb7lgUzgk4D3Ybrf/FZ+Zfrmz/Qvzzc9SOz1xXpUGbhkPh3I8VvuuiC364Hl3vwHgcuDlB10G8J9r4PtJ+Bk3I/6du/zKfXsn8Dp8wKePxLw/I/AZUBf383vs5gTIt5Q++Ooz2qw6X6v+5wg3zNcQkv3mz49yDpHAE9z5LsV9HtWT6whywjyHvqWeRfIDLbvQr0HuCrEZ4GPgf/ypn0NDPd/z4E7gcnA3cB7EbznQafH62ZtKpG7HffPojPuR8TXJ7tdPa+pxH3AXwIqODL22CZVnS0ie1R1tjdttoh87PfaYu/f8WIR+TfQETgoIgNUdb2I9Mf9+Aa6H1gJzAIGA+OBfcA7InIbcDXw/4AuIrIe9yNUAjwFICLn4X70F6tqVYjtkoDnNSKSCXRX1VdFZJK4evf7gG4B5fwZuFpVJ9RZochM7+GXuMT9I+95K+9eReQuYJmqVomIL4YyEcnz5o8A2ohIH2CFiFyPS1huBa6N46hY/cIIVsavgWLqjpv0IG6fBr4PGd79xcB1IvLYkaJ1QKht8OINdD6uWqlzkPcPEckAvoM7AuvgxXmO9/5li8j93nqyga4i8hJwot+/9GzgQECZZwZ53hm41G/aTd7rporILo60qfj2Z+D+8B3hLfZimuVbkarOwe0P32d6AEc+0xWqukdEAr9nlUC1t/4Ugnz/VHWhiHT1fbfq+Z6FivWoMkRkGO7Hvrt4VXfee9jBe7wZmC4iH/qFsk9ErgAyROTb3nr9nYY7OpsLvO9N2wKsUdUS30IichUwRlVVXBXhDNyR/e/VtWWF2o5Q0+PCkkqEVPUXwC9E5D+BnwHVIvI23iFyCJcAaapaKSLfxf3L2OM3P19E/ob7VzYA9+/IV953ALwfobNx/7Z6Av8SkeOA/cBPgsQ5zO9xPjDK+8EegqtqAHfksk5VzwkS80O4H9EfAjtCbNdnAc+PSgSq+jfgbyIyM7AcEZkTZJ23ezHfIyL3qvfXCvdPGdy2fpsjVQgfefe3AX8AuuF+7FNw/9wFOBGv2gv3J2BCsFj9YviJV56vjA+BW3Dvxyqg0Ju+QlUfCtwAERnvbcPIINtX3za8C5ThfnQqgHJVvVRETgZGhNhPK4EpwF2q+rVX/kxcNZL/H5gVuETUC/ej6PsMlOKOwupsQsDzJbhqoEX+E1X1Ci8hZqtq4CCFt+GOjn3743ZvekcgCxjJkfdxDu67FOwz/Z6IvAf0D/iePYP7/PUB5vu2x9t29bahLe4z7hPqe+b77HQPiDVYGalAP1x1tK/qrhS40nv8FVCAS8KFXhx/Bu7F/ak7B1cNixfvcNzR5XvAhcADIvKyt/7PRCTfL/7nfN8HVT3Bbx2/DNgO33v+o4DpgdsXF3byYyOIyBDcj/Q6VS0IY/lLcfWZO4H2wO2qOtebdwEwzJv3lqoeqmc9j6jqfaGeRxD/i8CTqro40tfWs07xffC9f4mF3uNh/uWIyAuqenMMyntBVW8WkYnArbgfEqj7rznSWC/A/ai18S3b0LoC1ttFVYtEpA2uMft4vLYwVa2JYPP813kPMAa/o1JVnSAi1+KSUytc8r9VVZeLyAnAbar6H0HWdbeq/q6esgar6mq/578OWERV9SHvPX8C2MXRn+drcG1Z6bgfzdtUdZnfd8D/NXNEZIKqzvCO1u4HXlLVT711Bf2eiUhn3Hu7SVV3edP6+sVZEpjsgn3PRCRNVatDvBdHleFNf01Vvxdkef/tawf82Nu+oL0Ave/gGrw/eMBxuD8WR/E7ygpKRD7BHSj4/hQE/Q6ISGq0n8OwxLNurTnfcP+Ec4GxvlsYr/kc6OQ97gLMiWZdhKgrr2f5i3EftMD6/+XABbhD7hm+Wwzfo7NDlH243SGCddW7Hlwd9DCOrvcPbB94xO89zwv2ngMLcNWVn+HVv0e63d7967h/hhfgOhz8LcTyL4SxzsCYHvemFwAdvceDgWl+rxkbePOmZ+Lq5/+Mq8ZKDShrUJjbF/TzXF9c9XwHQrZxhSi7A67d8PDnwZveJtS2hdrnuB/0PwJDA8o6C3d0ECyOPoG3UNvnfQ4+CrGeTOCngfECrQOWax3s9QHL+Nq++lD3O/BkQHkfxuo7Huxm1V/R+xD3j2Kv91xxH6D6lKvqXgB1/2R93SY/wtWT76tvXeK6Ip4D9AuoK29oPz6IOywPbAPIwPUuuxn4C3APdevNG+tB3L/RUGVHuq761rMFWKt+ddCewPaBMd79G7iqjJG46p0qjrznB9TVx5epa/OK9CjwQVwVW1dV9fVm+1hEFnvVRT/wW1aCxBhMqJi2AjUAqrpa6vZOu9Hv8RBc9d0cXLXgClyVy9m4RuIf+i37rBd/Q9sX6vNcX1yhXlNfG1ewsj/B/YDvDZj/d9yfpWDbFmqfDwG+BTzqHZn8Q1Wfw/XkqtPLK6AcXzVbf9zR2OgQ29dZVc8NsZ6XQ8Q7A5fUfKZz5LMbSqi2r1IReRJXpf0u7vsTN5ZUotdaVS+I8DVfePWln+I+ACv91nV+GK/fDmzk6LrywOqJOtSr2xeROm0AXt2/NPIHFBH5DNdulM+RjgsCdFPVzHrKDls923CbV4UQWAfdDdduEioBH6eqY7zGzm8C//YrLljniGDbvT7YZK9sqNv43B8YiPv3Gk2CDYxphLfdbYH53nYLRxqMUVX/uvt0jnR1DUx2hzsx+G1DvfvVe3zU51lcRwCtJ65Q34HANq4zgry//mWXqepR5zl52/ZAiG0Lus9VtQJ4XUTW4DqvPIQ7v2gL7gjxKOrXviUiWbg/ZaG2b6uIXK6qb0YQb2DnmNpgcQQIbPvytb/c630H8oGbVbW+9t9Gs6QSITnSQydUr6KQVPVuEbkIGIprXH3bm/W5uJ4ks+pbl6quBdaKyHBV/Xvg/Iao6sUBk64EnvP7sXqbED+gDfgt7ihrY8CXzf+9CVZ2xIKs51Zcw26gnrh2hlAJ2PcjWYX7p5rjV0awzhHBbNIgjed+2x3Y+HyhqhZGk2CDxPQiR/faAtjtt17/3mRt/bYxVE8rH18HjHr3a4jPc6iTS3fX8xr06I4Z+ap61D9zv/c2nF5kgT0jg+5zr73qGlzPrRdxDdsQpCeXBumYgfuMda1n+77E9QL8A0d6yQ3wXhtqX2wVkZ/jjmDOIkQ7S4BtuE4Q473n14vIRu+xAIeAX4nILzWCNsJIWUN9hKRu7xJ/Gu2O8lun/8rittNDxJCK9wOqdXumRbKOq1T1db/nP1fVx2MVY5Qx/UmDN1ZfgvsXOgD4DfBvVX0qwnXX2V6/6Q+r6i+9xymqWu+/TPEa9iMpO8z4/D9XJcDLqvqmiJwK/BXXKHwA+Il6jeIh1hN0v4o7+fUqVX0k1GsbEXuo9/YuVX3MOyIS3NFnIYCq3uRt2zO4H/kDwJ2qOs977SXAB7gjiIfx9rmI/AL33mwPKOv6wPJ9f+ZEZAN139vnVPXPUWxn0H0hIm1xPcZG4BrwH1XVnRGuu2+oeaoa7SXVGy7Xkkp0RORRdd2Lgz6PcF0X4upuff9Sok5QUZSdgvvg+npN+f7xRbOuS3G9nTI5+ryFJkFEzlbVuSLyFK57bSfcP82DqhrV0ZMc3VtshKp28ubNCHwPYvmeNxDX93GjMQT2wIqop1yo/SoiC3AdBl6JdewNkdC9yNbh/t2/pKqLgrwm2Hak4vaHf8+6kPvDOxI5F3gV1zhei0sGQY9ovfVfAXRR1adF5ERVXenNi7jXYmP5vgNxK0Dj2AugOd+IsAdWA+vy9eyZTxS9jRq5HR/jDtFf8m4vNmJdR50FnOz9VN9+w51vcC8wFXg6BvvP1zNrB9Am2Ock1u95A3EVAB28x/49sJYQpKdcpPsV968/Jc77K2jvREL3IkvHnQM02dsfP8Y7w72e7XgLeAeXeD/GO1sd+AaufWKF9/xh7z4f17BfhDsbvj8hend5y7+KO0/kS+/5R37zwtoXhNFDMJrvQDxu1qYSIYm+B1Z9fD17yjXKxvJGiKbDQShHnQWcLGE0MoPrGfNt3L/NfxO9wJ5ZhcAiEWmFO/Padya8qqtLj+V7Xp+teA28WrcHlm8/lYa5nlD7dTHwptdxAK+cwHGvGitU78SgvchUtRL4t4jsxjW63w3cISI31LMdoTptPIJLLG94z0/37g+o6jvizvf5HA4fjYSSo6rX+LUHpfvNq7MvRORiEdlMdD0E6wjzOxBzllQiF1UPrAaE1dsolhrT4SDIunw9fo46C1iTNxR6vY3MXo+dt3D/FG/C1WtH2s3ZJ3D/lanqqb7yfOWLN3AmMXjP6yMN98CqwO2nwycSBttPYezXctw/ed+Z5fGoSw/VlTpoLzKvfeT7uJEPnvcev4o7MlweYjtCddoINUTMSeJ63vX37sV7XSj1DZMSuC8m4sZxa2wXfAijA008WJtKlEI1ADdynY1uLI+grJh1OBCRcaHmaQNnAcebuPGS5qnXCOvXyDyDumcyr9EQdeIRlHXU/hORn6jXASAenTxCxBFqf+xW1RXB5gfbT/Wsp7uq/p+3zOFRCeJBRN7CtUc8jktePVT1dG+er5fVWuBtVVVxl5Z4Sf0atb3tuAWXZOrwElWoBvz/xFVnDsEdlS1T1ftCvS+hPuveUdIPcMOkrAPuVlXfqMtB94WIfKB+PR0lyDBH4QrS0eIuVX2svtc0hiWVFi6WHQ6aooAG+XLgkEbZIB/DmI7p91xEZqvqOO/xUR0R4lRm3P5w1dfBRCIciinE+n2fwc64z2DEnUIkhj0EJUZDJIVi1V/mjIDnR10M61imqj8RkX7A93BnYu+u9wWJcay/5xki0kZVwzl3olECesrV4n7gY91T7mHcUC91zs73ys7AjRPWVkTGahS99KL5DAbrIUiE2y1HRlGuM5ko2mciYUmlhYpTh4OmKlYN8o3SjN7zJ6i/I0IsRTMcUqRCNeDHsuxIP4OxKLuhSzPExbH2YTaxE48OB01OjBvkG6tZvOeq+hrwGjSurj9McespF0ZHhJiUHeVnMBZl13tphnixNpUWLh4dDpqSeDTIxyCmZvOe+3dEiPF6fb0T78WdkxLznnL1dEQ4DnfVx5iUHclnMEHbHZcRHA6v35KKMaapCegp1xM3rpVvgMR4n3GekF56YZSd0O2OFav+MsY0OX7n9jyFa1fojDecTgLLPqqXXgLLTvh2x0pcr1VsjDGNoao/wY32/DHuWii76n9FTCWtl16St7tR7EjFGNPUJbT3XhPqpdckei1Gyo5UjDFNll/PqYeAq3DnjMRbYC+9TcA83LhgCZGk7Y4Ja6g3xjRZyey9l8xeek2x12K4LKkYY4yJGav+MsYYEzOWVIwxxsSMJRVjmigR+VBENnpDpxtzTLCkYkwjiUiGiPxdRKaJyCwR+V4s1quqFwEvx2JdxiSKnadiTONdiLva47m+CSIyEXcp2xTcUOdXAL/EDb1xCu766UNV9TwRmQV8CeThhuT4lqoeClaQN4T6M7hrf2wHfqCqVSLyE+By3PAez6vqP2O/mcY0zI5UjGm8ecBQEfmNiPgurfuhqo5R1dG479lp3vTlwGpgLnX/1K3yxnYqAL5TT1l/AO5X1bG4a9Bf4U3/MXC1qo61hGKSyY5UjGkkVS0SkbNxF2D6q4h8DHzlnY3dCjgJd2QBsBAYBiyg7oCF0737dbjLzoYyFPi9iIC7ZrpvCP0bgedFpBJ4QFW/bvSGGRMFSyrGxIB3nfbp3gWr3gAuA34KLAXej2BV46i/HWUd8OvAS9uq6nzgEhE5C/gT8I0IyjQmZiypGNNIInIN7hrnNbijj3uBk4EXcO0e1WGs5l0R2Q/MU9VPRKQrMBnoB5SLyDhVvRHXTvMXEakFqoCfqupyEZnnlZ+GGyvKmKSwM+qNSTKvof4GVd2Y5FCMaTRrqDfGGBMzdqRijDEmZuxIxRhjTMxYUjHGGBMzllSMMcbEjCUVY4wxMWNJxRhjTMxYUjHGGBMz/x9Melf6pvIkNQAAAABJRU5ErkJggg==\n"
     },
     "metadata": {
      "needs_background": "light"
     }
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0xed8c208>"
      ]
     },
     "metadata": {},
     "execution_count": 6
    }
   ],
   "source": [
    "# 图1-4：50个最常用词的累积频率图\n",
    "fdist1.plot(50, cumulative=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "fdist1.hapaxes()[:50]=  ['Herman', 'Melville', ']', 'ETYMOLOGY', 'Late', 'Consumptive', 'School', 'threadbare', 'lexicons', 'mockingly', 'flags', 'mortality', 'signification', 'HACKLUYT', 'Sw', 'HVAL', 'roundness', 'Dut', 'Ger', 'WALLEN', 'WALW', 'IAN', 'RICHARDSON', 'KETOS', 'GREEK', 'CETUS', 'LATIN', 'WHOEL', 'ANGLO', 'SAXON', 'WAL', 'HWAL', 'SWEDISH', 'ICELANDIC', 'BALEINE', 'BALLENA', 'FEGEE', 'ERROMANGOAN', 'Librarian', 'painstaking', 'burrower', 'grub', 'Vaticans', 'stalls', 'higgledy', 'piggledy', 'gospel', 'promiscuously', 'commentator', 'belongest']\nlen(fdist1.hapaxes())=  9002\n"
     ]
    }
   ],
   "source": [
    "# hapaxes() 找出低频词，只出现一次的单词\n",
    "print(\"fdist1.hapaxes()[:50]= \",fdist1.hapaxes()[:50])\n",
    "print(\"len(fdist1.hapaxes())= \",len(fdist1.hapaxes()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "list(vocabulary1)[:50]=  ['[', 'Moby', 'Dick', 'by', 'Herman', 'Melville', '1851', ']', 'ETYMOLOGY', '.', '(', 'Supplied', 'a', 'Late', 'Consumptive', 'Usher', 'to', 'Grammar', 'School', ')', 'The', 'pale', '--', 'threadbare', 'in', 'coat', ',', 'heart', 'body', 'and', 'brain', ';', 'I', 'see', 'him', 'now', 'He', 'was', 'ever', 'dusting', 'his', 'old', 'lexicons', 'grammars', 'with', 'queer', 'handkerchief', 'mockingly', 'embellished', 'all']\n"
     ]
    }
   ],
   "source": [
    "# 这个用法已经废弃\n",
    "vocabulary1 = fdist1.keys()\n",
    "print(\"list(vocabulary1)[:50]= \",list(vocabulary1)[:50])"
   ]
  },
  {
   "source": [
    "### 1.3.2 基于细粒度选择单词\n",
    "\n",
    "-   数学公式：$\\{w|w \\in V \\& P(W) \\}$\n",
    "-   程序代码：`[w for w in V if p(w)]`"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "len(long_words)=  24\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "['CIRCUMNAVIGATION',\n",
       " 'Physiognomically',\n",
       " 'apprehensiveness',\n",
       " 'cannibalistically',\n",
       " 'characteristically',\n",
       " 'circumnavigating',\n",
       " 'circumnavigation',\n",
       " 'circumnavigations',\n",
       " 'comprehensiveness',\n",
       " 'hermaphroditical',\n",
       " 'indiscriminately',\n",
       " 'indispensableness',\n",
       " 'irresistibleness',\n",
       " 'physiognomically',\n",
       " 'preternaturalness',\n",
       " 'responsibilities',\n",
       " 'simultaneousness',\n",
       " 'subterraneousness',\n",
       " 'supernaturalness',\n",
       " 'superstitiousness',\n",
       " 'uncomfortableness',\n",
       " 'uncompromisedness',\n",
       " 'undiscriminating',\n",
       " 'uninterpenetratingly']"
      ]
     },
     "metadata": {},
     "execution_count": 9
    }
   ],
   "source": [
    "V = set(text1)\n",
    "long_words1 = [w for w in V if len(w) > 15]\n",
    "print(\"len(long_words)= \", len(long_words1))\n",
    "sorted(long_words1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "len(long_words5)=  19\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "['#14-19teens',\n",
       " '#talkcity_adults',\n",
       " '((((((((((',\n",
       " '........',\n",
       " 'Question',\n",
       " 'actually',\n",
       " 'anything',\n",
       " 'computer',\n",
       " 'cute.-ass',\n",
       " 'everyone',\n",
       " 'football',\n",
       " 'innocent',\n",
       " 'listening',\n",
       " 'remember',\n",
       " 'seriously',\n",
       " 'something',\n",
       " 'together',\n",
       " 'tomorrow',\n",
       " 'watching']"
      ]
     },
     "metadata": {},
     "execution_count": 10
    }
   ],
   "source": [
    "# 聊天语料库中所有长度超过 7 个字符，并且出现次数超过 7 次的单词\n",
    "fdist5 = FreqDist(text5)\n",
    "long_words5 = [w for w in set(text5) if len(w) > 7 and fdist5[w] > 7]\n",
    "print(\"len(long_words5)= \", len(long_words5))\n",
    "sorted(long_words5)"
   ]
  },
  {
   "source": [
    "### 1.3.3 词语搭配 和 双连词\n",
    "\n",
    "**搭配**：是不经常出现在一起的词序列。例如：『red wine』 是一个搭配，而『the wine』不是\n",
    "\n",
    "要获取搭配，需要从提取文本词汇中的词对(即双连词)，提取双连词使用 bigrams() 函数"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "doubleWords=  [('more', 'is'), ('is', 'said'), ('said', 'than'), ('than', 'done')]\n"
     ]
    }
   ],
   "source": [
    "doubleWords = list(bigrams(['more', 'is', 'said', 'than', 'done']))\n",
    "print(\"doubleWords= \", doubleWords)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "=============== >text4.collocations()< ===============\n",
      "United States; fellow citizens; four years; years ago; Federal\n",
      "Government; General Government; American people; Vice President; Old\n",
      "World; Almighty God; Fellow citizens; Chief Magistrate; Chief Justice;\n",
      "God bless; every citizen; Indian tribes; public debt; one another;\n",
      "foreign nations; political parties\n",
      "None\n",
      "=============== >text8.collocations()< ===============\n",
      "would like; medium build; social drinker; quiet nights; non smoker;\n",
      "long term; age open; Would like; easy going; financially secure; fun\n",
      "times; similar interests; Age open; weekends away; poss rship; well\n",
      "presented; never married; single mum; permanent relationship; slim\n",
      "build\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "# collocations() 寻找出现频率大于基准频率的连词，默认频率=20，默认窗口=2\n",
    "show_title(\"text4.collocations()\")\n",
    "print(text4.collocations())\n",
    "show_title(\"text8.collocations()\")\n",
    "print(text8.collocations())"
   ]
  },
  {
   "source": [
    "### 1.3.4 计算其他东西"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "文本中单词长度序列： [1, 4, 4, 2, 6, 8, 4, 1, 9, 1, 1, 8, 2, 1, 4, 11, 5, 2, 1, 7, 6, 1, 3, 4, 5, 2, 10, 2, 4, 1, 5, 1, 4, 1, 3, 5, 1, 1, 3, 3, 3, 1, 2, 3, 4, 7, 3, 3, 8, 3]\n",
      "=============== >查看文本中单词长度的分布< ===============\n",
      "    3     1     4     2     5     6     7     8     9    10    11    12    13    14    15    16    17    18    20 \n",
      "50223 47933 42345 38513 26597 17111 14399  9966  6428  3528  1873  1053   567   177    70    22    12     1     1 \n",
      "None\n"
     ]
    },
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAEKCAYAAAA8QgPpAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xd8VHW+//HXZ9JIAiRAKAEpgmBDQRKqCPZF117WstjWhbWs5bpF713dclev9+7quqvrehV7XX520LWiglKEIE1REUUpoQUklBRSPr8/ZlDkIsxMMjkzyfv5eBwnc/iez3yCYd45Z875HnN3REREYhUKugEREUlNChAREYmLAkREROKiABERkbgoQEREJC4KEBERiYsCRERE4qIAERGRuChAREQkLulBN5BIBQUF3qtXr7i2raysJDs7u0GvrxrJ1YNqqEaiayRDD41RY+7cuWXu3nGvA9292S5FRUUer5KSkri3VY3k7EE1VCPRNZKhh8aoAZR4FO+xOoQlIiJxUYCIiEhcFCAiIhIXBYiIiMRFASIiInFRgIiISFwUILtRW1cfdAsiIkkvYQFiZlVm9k5kOd3MhpjZNDObaWa3RcakmdndZjbVzGaY2SGR9VGPbWz19c4Z98xgwgebWbelKhEvISLSLCTySvQ17n7kjidmtgA42d2XR8JhBNAdSHf30WZ2NHA7cDwwIYaxjWreik18uKqchQ5T//QOl47cl/Gje9O2VUZjv5SISEqz8EWHCShsthQoBcqAPwIPAEOBm4HRwFNAb2BKZNxNwMDIMiWase7eczevOx4YD1BYWFg0efLkmHtfXl7DYwvK+WBtLQCtM40zD2jNmP1yyEyzqOtUVFSQk5MT8+s3xxrJ0INqqEaiayRDD41Ro7i4eK67F+91YDSXqzdkAc4AXgEWA/cChwFXAdcAdwIPAb8BMoGlQPtox+7ttRs6lUnJlxv97P+d4T2vf8l7Xv+SD/uvN33i7OVeU1sXdY2Gai41kqEH1VCNRNdIhh4aowZJNJWJAcsib/q3AvOBE4EZQAng7n4LMAj4xN03Rjs20Y0X9WzHxPHDeOiSwRxY2JbV5VX8+tmFjPnbu7z64ZodASki0iIl5DMQM+sEPAtUA+uAK4FngKeBKuANd59jZvOB481sGlADXBYpMT6GsQllZhy1fydG9+3I5IWl3P76Epau28plj89lYPd8rh9zAMP7dGiKVkREkkpCAsTd1wFH7LL6LWDwLuNqgLG72T7qsU0lFDJOHdiNE/oX8s85y7lzymfMX7GJ8ybMYlS/jvz6B/vTv1teUO2JiDQ5XQcSo8z0EBcO78XUXx3FL47rR+usdKYtWc9Jd73HVU/N48uybUG3KCLSJBQgccrNSueqY/oy7ddH8dOR+5KZFmLyglKO/ctUbnxhka4hEZFmTwHSQO1zM7nxpIN4+1dHcnbRPtS78/is5Yz+0zu8slR7IyLSfClAGkm3/Gz+fPYAXrt2FMcf1JnKmjrun7eFtz9dF3RrIiIJoQBpZH07t+G+C4v55fH9APi3ifNZ+XVFwF2JiDQ+BUiCXHHkfgzqksWmihqufOIDqmvrgm5JRKRRKUASJBQyrh6aR7f8bBasLOfmlz4OuiURkUalAEmgNpkh7hk7iMy0EI/N+ooX568KuiURkUajAEmwQ/fJ57cnHwTADc8uYsnaLQF3JCLSOBQgTeDHQ3tw+mHdqKyp47LH57K1ujbolkREGkwB0gTMjFtO70+/zq35Yv02bnh2oSZiFJGUpwBpIjmZ6dwztojczDReWriaR2Z8GXRLIiINogBpQn06tuZPZw0A4JZ/fcwHy78OuCMRkfgpQJrYDw8t5JLDe1FT51z5xAds2FoddEsiInFRgATg3084kEE98lldXsW1E+dTV6/PQ0Qk9ShAApCZHuLuHw+ifW4m735Wxp1TPgu6JRGRmClAAlKYl82d5x6GGdz51me8o0kXRSTFKEACNLJvAdcd2w93uHbifFZtqgy6JRGRqClAAnblUftx5P4d2VRRwxWadFFEUogCJGChkHHHjwaGJ11csYlbXtakiyKSGhQgSaBdbib/+HF40sVHZ2rSRRFJDQqQJDGgez43RSZd/PfnFvGZJl0UkSSnAEkiY4f24LSBXanYXsflT3zANk26KCJJTAGSRMyM/zrjEPp1bs3SdVu54blFmnRRRJKWAiTJ7Dzp4uQFpby4RPdTF5HkpABJQn06tuZ/zjoUgMcWbuHPr32iPRERSToKkCR10qFd+dOZhxIyuPvtz/nl0wupqasPui0RkW8oQJLYjwZ354bD25GdkcazH6zk0kdK9MG6iCQNBUiSKyrM4qnxw2ifm8m0Jes5975ZrN+iKeBFJHgKkBQwsHs+z10+gh7tc1i0qpwz75nBsrJtQbclIi2cAiRF9CrI5dnLR3DoPnks31jBmffMYP6KTUG3JSItmAIkhXRsk8VT44Yxql9HNm7bznn3zeKtT9YG3ZaItFAJCxAzu8rMas2sl5kNMbNpZjbTzG6L/Hmamd1tZlPNbIaZHRJZH/XYlig3K50HLirmzEH7UFlTx7hH5zJxzvKg2xKRFig9EUXNrBdwEjAjsmoCcLK7L4+EwwigO5Du7qPN7GjgduD4GMe2SBlpIW47+1C65GVx99ufc/2zi1hTXs3Vx+yHmQXdnoi0ENbYF6hZ+B1sEvAL4D7g34AHgKHAzcBo4CmgNzAFKAVuAgZGlinRjHX3nt/z+uOB8QCFhYVFkydPjuv7qKioICcnJ65tm7LGq0sruH/eZhw4rnc24w5rS1rouyGSDN9LMvSgGqqR6BrJ0ENj1CguLp7r7sV7HejujboAPwOui3z9DjAIWAzcCxwGXAVcA9wJPAT8BsgElgLtox0bTS9FRUUer5KSkri3beoaryxa7f1+8y/vef1LfunDs72iujaQPhK5vWqoRirUSIYeGqMGUOJRvMcm4jOQk4HTzOwdwnsU/wAOBG4F5gMnEj60VRLOL78lEjKfuPvGSEDsdWwC+k5ZY/p34YmfDiUvO4M3P17H+ffPYuO27UG3JSLNXKMHiLuf5O6j3P1IwiFwLnAM8DQwDZjp7nMIH5rKNLNpwC2ED3VB+PBTtGMlorhXe569fDjd8rOZt3wTZ90zgxUbNRGjiCROQj5E3yESIgBfAoN3+bMaYOxutnkr2rHyXft1asNzV4zgogdn88maLZxxzwweunjw3jcUEYmDrgNpZjq3bcX/u2w4w3t3YP2Was65dyYfl+lwlog0PgVIM9S2VQYP/2QwJw/oyrbtdTy+ULfHFZHGpwBpprLS0/jvMw4hOyONTzbUsHyDPg8RkcalAGnGcrPS+cHBnQF4Yf6qgLsRkeZGAdLMnXZYNwBemLdKdzUUkUalAGnmRu5XQH5WiC/KtrFwZXnQ7YhIM6IAaebS00KM7NEKgOfn6TCWiDQeBUgLMKpnNgCTF5Tqvuoi0mgUIC1A7/x09uvUmg3btvPeZ2VBtyMizYQCpAUwM06PfJiuw1gi0lgUIC3EKQO6AvD64jVsra4NuBsRaQ4UIC1E9/Y5DNm3PVU19bz64Zqg2xGRZkAB0oKcvtM1ISIiDaUAaUFO7F9IZlqI6Z+Xsaa8Kuh2RCTFKUBakLycDI4+oBPuMGmB9kJEpGEUIC3M6YN2nI1VGnAnIpLqFCAtzJH7dyQvO4OPV2/mkzWbg25HRFKYAqSFyUpP44eHFgK6JkREGkYB0gKdETkb68V5pdTXa4ZeEYmPAqQFKurZjn3aZbNmcxWzlm0Iuh0RSVEKkBboO1ObfKDDWCISHwVIC7XjRlOvfLiGqpq6gLsRkVSkAGmh+nRszYB98thaXcubH68Nuh0RSUEKkBbsNB3GEpEGUIC0YCcP6EpayJi6ZD0btlYH3Y6IpBgFSAtW0DqLUX0LqK13Xl60Ouh2RCTFKEBauNN0oykRiZMCpIU7/qAu5GamMW/5JpaVbQu6HRFJIQqQFi47M40x/cNTm+g+ISISCwWIfHujqfmrcNfUJiISHQWIMLxPBzq3zeKrDRV8sHxT0O2ISIpQgAhpIePUgbrdrYjERgEiAJwWCZCXFpayvbY+4G5EJBUkLEDM7FUze9vMZppZfzMbYmbTIs9vi4xJM7O7zWyqmc0ws0Mi66MeK43jwMI27N+5DV9X1DBtyfqg2xGRFJCwAHH3Me5+FPAKcDgwARjr7sOBIWY2AjgLSHf30cCNwO2RzWMZK43AzHa63a0OY4nI3lmizroxsx8CfwDygdHAZGAocHPk+VNAb2AKUArcBAyMLFOiGevuPXfzuuOB8QCFhYVFkydPjqv/iooKcnJy4to2VWuUVdRx2cvrSQ/BA6d0Ijfj298vGtpHqv1dqIZqpGoPjVGjuLh4rrsX73Wguyd0AU4kHB6LgXuBw4CrgGuAO4GHgN8AmcBSoH20Y/f22kVFRR6vkpKSuLdN5Rrn3jvTe17/kk+cvbxR+0jFvwvVUI1U7KExagAlHsX7e1N8iL4JqIi86d8KzI+EygygBHB3vwUYBHzi7hujHdsEvbc4Ow5jPTdvZcCdiEiyS09EUTPbF3gYqAI2A9cC+wNPR9a94e5zzGw+cLyZTQNqgMsiJcbHMFYa0Zj+XbjphQ+Z9cVGVm2qpFt+dtAtiUiSSkiAuPsywp9d7GwVMHiXcTXA2N1s/1a0Y6VxtW2VwbEHdeblhauZNL+Uy4/sE3RLIpKkYj6EZWaHm1mrRDQjyeH0gTvOxlqpqU1E5HtFFSBmNiXyeAPhM6AeTWRTEqzR+3ekXU4GS9ZuZfHqzUG3IyJJKto9kB2XJp8QWTolph1JBhlpIU4e0BXQ1CYi8v2iDZBaM/s7MC1yildaAnuSJLDjRlMvzi+lrl6HsUTk/4r2Q/RLgGOAiZHnDySmHUkWh3XPp1eHHL7cUMGMz8to2GVNItIcRbUH4u5r3P0Jd6+NPH84oV1J4MxMt7sVkT2K9kP0B/b0XJqnHTP0vvbhGqo0Q6+I7CLaz0D23eX5/o3diCSfXgW5DOqRz7btdcwprQ66HRFJMnsMEDM7ycweAg4wswcjy4uEry6XFuCMQfsA8MrSioA7EZFks7c9kNnAI8DayOMjhGfIPSXBfUmSOP2wbuRlZ/DphhrmfrUx6HZEJInsMUDcfZ27vwP8092nRpY5Oz5Ml+YvNyudscN6ADBh2rKAuxGRZBLtZyD3mNmZZnbhjiWhXUlSuWh4L9JD8NriNXxZti3odkQkSUQbIK8DRxD+MH3HIi1Ep7atGNUjG3d44D3thYhIWLQXEla6+7UJ7USS2sn9cnjry0qenruC647rR7vczKBbEpGARbsHssDMfmtmo3YsCe1Kkk6PvAyO2r8jVTX1PD7rq6DbEZEkEG2A5BE+bHVJZLk4UQ1J8ho3qjcAj8z8kqqaumCbEZHARXUIy90vSXQjkvyG9+5A/25t+XDVZl6Yt4pzh/QIuiURCVC0U5ksM7MvdlrmJroxST5mxrgjwnshE979gnrN0ivSokU7meK+7t7b3XsDZwKzEtuWJKsTDymkW342n6/fxtufrgu6HREJUMy3tHX3eUDHBPQiKSAjLcQlh/cC4L5pXwTbjIgEKtpDWG+b2VuRx9mArkRvwc4Z3J02Wem8v2wjC1ZsCrodEQlItHsgF/Pt2Vcnuvv5iWpIkl+bVhmcPzQyvcm72gsRaamiDZB1wGnAL4FTzEy3tG3hLj68F+kh41+LVrNio2bqFWmJog2QR4B2wEtAb+DehHUkKaEwL5tTBnSl3uHB6ZreRKQlijZAOrr77939NXe/EeiTyKYkNfw0ckrvxDkrKK+oCbgbEWlq0QZInZn1BjCzfYGcxLUkqeKgrm05om8BFdvreGK2pjcRaWmiDZDrgMfNbCnwTOS5yDcXFj48/UuqazW9iUhLstcAMbMngQ/dfYS77wcMBsYlvDNJCUf0LeCALm1Yt6WaSfNLg25HRJpQNHsghe5ev+NJ5OvuiWtJUsmu05u4a3oTkZYimgDJMLNvPvMws1ZAm8S1JKnm5AFd6dw2iyVrtzJ1yfqg2xGRJhJNgPwVeNPMrjazq4ApwMMJ7UpSSmZ6iEsOD9+kUhcWirQcew0Qd38GuIjw9CUh4Bp3/0eiG5PUct6QHuRmpjF96QY+Ki0Puh0RaQLRzsb7mbv/w93/5u4lextvZjlm9nhk7qz3zWx/MxtiZtPMbKaZ3RYZl2Zmd5vZVDObYWaHRNZHPVaSQ152BucMDk9vcv+7urBQpCWIeTbeaLh7BfA7dz8KeAC4ApgAjHX34cAQMxsBnAWku/to4Ebg9kiJWMZKkrjk8F6khYzJC0op3VQZdDsikmCW6LNmzOz3gAOnAkOBm4HRwFOEp0WZApQCNwEDI8uUaMa6e8/dvN54YDxAYWFh0eTJk+Pqu6Kigpychl0v2RJr/GXWJqavqOKUfjlcNKBtID2ohmoEVSMZemiMGsXFxXPdvXivA909YQvhCRhfBDoDiwnPoXUYcBVwDXAn8BDwGyATWAq0j3bs3l6/qKjI41VSUhL3ti25xoIVX3vP61/yg3/7qpdXbg+kB9VQjaBqJEMPjVEDKPEo3uMTcggLwMx+ApwNnO3uayNv+rcC84ETgRlASTjD/BZgEPCJu2+Mdmyiepf4HbpPPsN6t2drdS0TZ68Iuh0RSaD0RBQ1s2LCn2NMB143s+2EDys9DVQBb7j7HDObDxxvZtOAGuCySIlYxkqSGT+qN7O+2MiD05dx8eG9yEhL2O8pIhKghASIh8/U2t09QwbvMq4GGLub7d+KdqwknyP7dWK/Tq1Zum4rLy9czWmHdQu6JRFJAP1qKI0uFDLGHRG+sPC+aZreRKS5UoBIQpw6sBsFrbNYvHozMz7fEHQ7IpIAChBJiFYZaVw8InyW9X3TNL2JSHOkAJGE+fHQnmRnpDF1yXqWl+uOhSLNjQJEEqZdbiZnF+8DwKQlFQF3IyKNTQEiCXXpyH0xg3e/qmR1uaY3EWlOFCCSUD075DLm4C7UOpz69+m8smi1zsoSaSYUIJJwvz35IPbvkMG6LdVc/sQHjHu0RJMtijQDChBJuMK8bG4+qj1/PK0/bbLSefPjdRz3l6k8+N4y6uq1NyKSqhQg0iRCZlwwrCdv/mI0J/TvwrbtdfznS4s54x/TdQMqkRSlAJEm1bltK+4ZW8SEC4spzGvFgpXlnPL36dz6r4+p3F4XdHsiEgMFiATiuIM688Z1o7l4RC/q3bl32hcc/9epTF2yPujWRCRKChAJTOusdH5/ysE8f8XhHFjYlhUbK7nowdlc8895lG2tDro9EdkLBYgEbmD3fCb9/HBuOOEAWmWEeHF+KcfcPpX/V7JCp/yKJDEFiCSFjLQQl43uw+vXjuaIvgWUV9bw62cWct6EWXyxfmvQ7YnIbihAJKn06JDDoz8Zwt/OHUiH3ExmfbGRMX97l7umfEaNTvkVSSoKEEk6ZsapA7vx5nWj+VHxPmyvref2N5bwyzc2sKxsW9DtiUiEAkSSVrvcTP501gCeHDeUfQtyWbm5lvMnzGLFRk3MKJIMFCCS9Eb0KeClq0ZyYEEGq8urOP/+WZoKRSQJKEAkJeRmpfMfI9sxoHs+KzZW8uP732fd5qqg2xJp0RQgkjJyMkI8eskQDu7almVl2zj//vd1vYhIgBQgklLycjJ47NKh7N+5DUvXbWXs/e/z9bbtQbcl0iIpQCTltM/N5PGfDqV3x1w+WbOFCx+cTXmlbpkr0tQUIJKSOrbJ4smfDqNnhxwWrSrn4odms7W6Nui2RFoUBYikrC55rXhy3DC65Wczb/kmfvLQHCq2K0REmooCRFJat/xsnhw3lC5tWzH7y42Me7SEqhpNCy/SFBQgkvJ6dsjliXFDKWidxfSlG7j88blU1ypERBJNASLNQp+OrXly3FDa52by9qfruerJedTU1QfdlkizpgCRZqNf5zY8dukQ2rZK5/XFa7l24nxqFSIiCaMAkWbl4K55PHbpUFpnpfPywtX86pmF1GkWX5GEUIBIszOgez4PXzKYnMw0np+3it88v4h6hYhIo1OASLNU3Ks9D1w0mKz0EP+cs4LfT/5IdzcUaWQJCRAza2VmPzezL83s4si6IWY2zcxmmtltkXVpZna3mU01sxlmdkisY0W+z/A+HZhwYTGZaSEenfkVt7z8sUJEpBElag+kM1ANPLHTugnAWHcfDgwxsxHAWUC6u48GbgRuj2OsyPca1a8j94wdREaacf97y3jyw60KEZFGkpAAcfev3H0CUANgZu2AOmC1mf0PkAkMBkYAL5tZMXANsH8sYxPRuzQ/xxzYmbvOO4y0kPHcJ9v4j+cXsb1WZ2eJNJQl8rcxM/s98CUwCXgPeBf4X2Ak4fDqA7QBlgJ/BhYDQ6Id6+777eY1xwPjAQoLC4smT54cV+8VFRXk5OTEta1qJGcPM1dW8bf3N1FTDwcWZPCrEe3Iy4r9d6hk+F5UIzlrJEMPjVGjuLh4rrsX73WguydsAX4PXBz5einQCzDgFcJ7FRcCD0b+fBjwUqxj97QUFRV5vEpKSuLeVjWSswd393++PtOH3PKG97z+JR9x6xRfXFoeSB+q0TxrJEMPjVEDKPEo3uPT446oPTCzfYAXgK5AtZkdTXiv4GmgCnjD3eeY2XzgeDObRvhw12WRErGMFYnafu0zmPTzkYx/bC4LVmzizHtmcMc5A/nBwV2Cbk0k5SQkQNx9JbC73Z/Bu4yrAcbuZvu3oh0rEqvObVsxcfwwbnh2IS/ML+Vnj83lF8f14+dH74eZBd2eSMrQdSDSIrXKSOOOcwZy/ZgDMIPb31jCVU/No3K7JmEUiZYCRFosM+PyI/sw4YJicjPTeGnhan5070xWl1cG3ZpISlCASIt37EGdef7Kw+nRPnx3w5Pvms4Hy78Oui2RpKcAESE8k++LVx7O8N4dKNtazbn3zuLZuSuDbkskqSlARCLa5Wby6KVDuGBYT7bX1fOLpxdw678+1my+It9DASKyk4y0EH88rT83n9af9JBx77Qv+Okjc9hcVRN0ayJJRwEishtjh/XksUuHkp+TwdufrueMf8zgy7JtQbclklQUICLfY3ifDky6ciT9Ordm6bqtnHr3dKYvLQu6LZGkoQAR2YMeHXJ49vIRHHtgJ8ora7jwwdm8/Nk2zegrggJEZK/atMrgvguKueLIPtTVOw/O38Klj5SwdnNV0K2JBEoBIhKFUMj49ZgDuOu8w8jNMN76ZB3H3zGNF+at0t6ItFgKEJEYnDygK3f8oIAj9+9IeWUN106cz2WPz2X9luqgWxNpcgoQkRh1yE7joYsH8z9nHkLrrHRe+2gtP/jrNF5euDro1kSalAJEJA5mxjmDe/Dav41i5H4FbNy2nSuf/ICfP/kBX2/bHnR7Ik1CASLSAN3ys3ns0iH88bT+5EQmZDzujmm8/tGaoFsTSTgFiEgDmRkXDOvJq9eMYsi+7SnbWs34x+Zy3cT5lFfoCnZpvhQgIo2kR4cc/jluGDeddBBZ6SGem7eK4/86lXc+XRd0ayIJoQARaUShkHHpyH351zVHcFiPfNZurubih+Zww7ML2aL5tKSZUYCIJECfjq155rIR3HDCAWSmhfjnnBWM+eu7mgpFmhUFiEiCpIWMy0b34aWrR3JItzxWbarkx/e/z00vfEhlbX3Q7Yk0mAJEJMH6dW7Dc1eM4Lrj+pEeMh6b9RVXv1rGX95YwoqNFUG3JxI3BYhIE8hIC3H1MX158eeHc2BhWzZW1nPnlM8Y9ee3GXv/+7w4fxVVNXVBtykSk/SgGxBpSQ7umsfLV43kkVdnMn9zNq98uIb3lpbx3tIy8rIzOG1gV84u7k7/bnlBtyqyVwoQkSYWChmHds7ikhMP4z8rapi0YBUTS1bw4arNPDLzKx6Z+RUHd23LOYO7c+qAbuTlZATdsshuKUBEApSXk8EFw3txwfBefFRaztMlK3l+3io+Kt3Mb1/8iJtf/pgxB3fhR8XdGdGnA6GQBd2yyDcUICJJ4uCueRx8Sh43nHAAry9ey9MlK3hvaRmTFpQyaUEp+7TL5uyi7pxVvA/d8rODbldEASKSbFplpHHKgK6cMqArK7+u4Jm5K3m6ZCUrv67kjjeX8NcpSxi5XwH987aT1aWcfp3bkJmu82Gk6SlARJLYPu1yuPbYflx9dF9mfL6BiSUreO3DNbz7WRnvAveUvEdGmtG3UxsO7to2vHTL48DCtrTO0j9vSSz9hImkgFDIGNm3gJF9C9hUsZ1JC0p5de7nrK5KY1nZNhav3szi1Zt5em54vBn06pDLQYVtOWhHsHTNo2ObrGC/EWlWFCAiKSY/J5MLh/fi4MwNFBUVsbW6lo9Xb+ajVeUsXr2Zj0o3s2TtFpaVbWNZ2TZeXvTtja46tcn6JkwO6tqW6vIa+lXV0KaVzvSS2ClARFJc66x0Bvdqz+Be7b9Zt722ns/WbeGj0s0sLt3MR6XlLC7dzLot1az7dD1vf7r+2wKvv06brHQK81tRmJdN18hjYV4ruuaHHwvzssnOTAvgu5NkpgARaYYy00Phs7q6fntBYn29s3xjBR9FAuWj0s18VrqRjdXOlupatqzdypK1W7+3ZrucjO8GTH4ruuZls2ldNW3XbqGgdRb5ORmY6VTjlkIBItJChEJGr4JcehXk8sNDCwGYO3cugwYNYlNFDaXllazeVMXq8kpKy6tYvSnyWF7JmvIqvq6o4euKGhav3vx/i0+dBkB6yOjQOpOObbIoaP3tEn6eScfWWRS0yaJj6yzysjN0XUuKS6kAMbM04E6gP5AB/MzdFwXblUhqMzPa5WbSLjfzO3ssO6uvd8q2VX8bMJuqWLO5itJNlXxRWkaVZVK2pZrNVbWs3VzN2s3Ve33dHWFT0DqL+u2VtJ83i4y0EJlpITLSQ2SlhchIC5GRbmSmpUUev/3zjLQQmekhMtOMjLQQy5dXUppeSlrICJmRFjLSQuHvLy3yfOf1Ifv2+Y7H5eU1tFm7BSN8IkLkb+ibry3y9/Xt19+MwAzWbqv9ZoJMsz2P3bGeXdaXV9WxYeve//72pLy6nu219Qk/vTulAgTZUDbNAAAJcElEQVQ4C0h399FmdjRwO3B8wD2JNHuhkNGpTSs6tWnFgO753/mzuXPnUlRUBEBVTR0btm2nbEs1ZVurWR95LNu6nfU7P99d2JRtaHij789reI3XpzVs+3+93fAeJr/Z4BIPdyjjyP07NbyXPTB3T+gLNCYz+xswBSgFbgIGunvPXcaMB8YDFBYWFk2ePDmu16qoqCAnJ6dB/apGcvWgGslVo6bOKa+uZ1NVPZu3VZKWmUVtvVNbD7X1Tk1d5HHndZHH2vrw9juP315bi4XSqXen3oksjn/zNd/5M+fb53U71tXXY2bseFf0b/4Tfvjm3dLZ7Zh6r/92VyOG7XYe4+409MCeA9cNy+fQzvGdtl1cXDzX3Yv3/kLuKbMQPnz1EPAbIBNYuqfxRUVFHq+SkpK4t1WN5OxBNVQj0TWSoYfGqAGUeBTvyak2/0EJ4O5+CzAI+CTgfkREWqxU+wzkKeB4M5sG1ACXBdyPiEiLlVIB4u41wNig+xAREd3SVkRE4qQAERGRuChAREQkLgoQERGJiwJERETiklJXosfKzNYDX8W5eQFQ1sAWVCO5elAN1Uh0jWTooTFq9HT3jnsb1KwDpCHMrMSjuZRfNVKmB9VQjUTXSIYeGqtGNHQIS0RE4qIAERGRuChAvt99qtGoNZKhB9VQjUTXSIYeGqvGXukzEBERiYv2QEREJC4KEBERiYsCJImZWd+gexAR+T4KkAgzO8jMxplZFzN7wcyeMrPCRqj7cAxje+y8AI+YWfc4XrOvmd1hZleZWVpk3asx1jg18tjLzJ40s7+bWV6MNS6JPJ5sZl+Y2UIzOyqG7T81sxvNrHUsr7tLjdMj/z8HmtkVZvaBmZ0WY42+ZjbRzBaZ2VtmdrWZxXzXUTMbFvl/cpWZFcW6/ffUHBHHNnk7fi4iz89twOsPNLMD4t1+pzq3NHD7vmbWJY7tLPKYbWY/M7MLd/67ibLGeWbWJtbX3qVGlpkNjHydb2anmFnM9wlOxM/YHl9PH6KHmdl04FXgAuAaYBNwrbufE0ONC3ddBfzW3ftEuX0NMJ1vb488EJjn7kdH20OkzkzgQaBrpMbZwBvuHsub91vufrSZPQM8Qfh7OcPdo74fi5m96e7HmtlU4EzCNwF7Ido+zOxd4BHg54RvJnaXu1dE+/qRGvOBS4GbgTrgZ8BT7j4qhhpvAzcBnwN/BWYAue7+XzHU+BMwFMgAngROBGa4+83R1vieurPdfUgM4/8IXAhUApe7+9s7/l/HUOM24CTggcjjNuAZd38whho7jzXgB8Cr7v6TGGo87+6nm9lVhO8T5MBt7v5MDDWmuPsxZvYIsBaoBvZx90tiqLECWAM8B9zp7tui3XanGi8Q/oV+KTAM+AhoG+P7T0J+xvZEeyDfqnL3PwLl7v6Ku88E2sZY4w9AX2DfyNILiOWu9tcBVcC4yJvs/FjDI6LS3Se4+x+AhyNLzL8xRxS4+/Pu/hzQKcZtt5tZB2Cru5e5eznhe9lHq8bd7yf8j6IGmGNmv4ixh63uPhdYSTi8VhEOkliYu7/n7qsJ/338DTgmxhqHu/toYBrwLvBD4LiYmjD7PLInt2NZBhwSYx9jCP9sDgN+aWZHxLg9kW0HAb8g/D38EIh1L+YIwm+6DxP+JWFt5OtY7NgjPitS7wjg8hhr7Njb6Obuv3b3m4BY9/qXAiOALcAsM/uVmWXHWKOzu58C5AAT3X0csNepRHbR4J+xWKXUHQkTbHvk8Xc7rYv1h+A2wkH0wI4VZnZStBu7+11mNgm4y8zeIv43/VVmtq+7L3P3FyNv4vfGWGO4mX0B1O60rlWMNX4HTAYyzOweoCcwO4btDcDdq4G/mNn9hEM2Fqsiv122Ac4ys35AfYw1vor85m7Aqhi33aHWzLoBNwJ3AbcS+8/XbMJ7tJ/tWGFmc2KssZVwIG4ys7OB5wn/f4lFtbtXmNk0d98e6SOmwz6E94z/Gzgf+BXwtbtPi7FGYWSvP2enPmL9NzM9sgez3sw6EQ7XWP+/eORuqX+P7FldC3wAHBhDDTOzfQn/QpBpZrlArIduG+NnLDburuV7FqBXjOOzgB67rOsW52uPBybHuW32btYVNcLfR8c4tmkNnACcAwyKcdszG6HnLGAU4T2fAuAnQJcYa6QDFwGXEX6zgvBvjLHUGALMB0oIH55YAhwZY41DgYt2WfeHGGv8kPAhmh3PC4BJMdY4bzfrYupjp+1GAW8T3tuOddvfRZZrdlp3fYw10oD/AN4HPgZeBA6Nscb/+XcK5MdY42TgLcKHnS4nPAnslTHWGNzQn7FYF30GItKEzCyf8B7A12b2sLtf3MB6KV8jcrhnBHBBqn8vQdaw8FmbVwIbgP919/Vm9qq7j2lIH3uiQ1giTWDXEywiR1pi+vzhe07SUI3GrUGq9gE8SvjEhm7AfZFDlLF8BhszBYhI0/gD8Djf/Uwp1n/cqqEae1Lp4ZNOdpyG/zDxf44aFQWISNNo0AkWqqEaUWiMk2dios9ARJqAmWUR/uB9+U7runn4tGLVUI3GqJHt7pW7rCvy8GnsCaEAERGRuOhCQhERiYsCRERE4qIAEQmYmb1iZl+a2cVB9yISCwWISJTMrJWZPWJmb5rZO2Z2XmPUdfcTiH0eKJHA6TRekeiNIXyu/bE7VpjZycD1hH8ZW0d4Yr8bCV/MdQgwE+jv7seZ2TuE57MqJjxz7KnuvnV3L2RmvYD/JTy5Xinhq7RrzOxqwjMbGzDB3R9r/G9TJDraAxGJ3ntAfzP7Y2TiO4BX3H2ku48g/O9pUGT9YsJzEb3Ld39R+9TDMyzPBc7Yw2v9mfDEiaMIzyR8VmT9lcC57j5K4SFB0x6ISJTcvSwy/fnRwD1m9hrhKeZ/S/geDAcS3mMAmAcMIDwr685XA0+JPH4O7OkGSP2BP0WmPGlNeHI9gEuACWa2Hfi9uy9s8DcmEicFiEgMPHzh1JTIVPfPAKcRnr77Q+DlGEqNZs+fe3wO/G7Xi8DcfQZwkpkdDtxB7PclEWk0ChCRKJnZ+YSn2a8jvFfx78BBhCewK+W78xh9n0lmVg685+6vm1lH4GnCNx+rMrPRHr4b3vXAP8ysnvDNtK5198Vm9l7k9dOBvzTqNygSI12JLtJEIh+iX+zuXwbcikij0IfoIiISF+2BiIhIXLQHIiIicVGAiIhIXBQgIiISFwWIiIjERQEiIiJxUYCIiEhc/j+5pYj7SZ5KpgAAAABJRU5ErkJggg==\n"
     },
     "metadata": {
      "needs_background": "light"
     }
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0xf0545f8>"
      ]
     },
     "metadata": {},
     "execution_count": 13
    }
   ],
   "source": [
    "print(\"文本中单词长度序列：\",[len(w) for w in text1][:50])\n",
    "\n",
    "fdist_word_len_1 = FreqDist([len(w) for w in text1])\n",
    "show_title(\"查看文本中单词长度的分布\")\n",
    "print(fdist_word_len_1.tabulate())\n",
    "fdist_word_len_1.plot()"
   ]
  },
  {
   "source": [
    "表1-2：NLTK 频率分布类中定义的常用函数\n",
    "\n",
    "| 例子                        | 描述                                          |\n",
    "| --------------------------- | --------------------------------------------- |\n",
    "| `fdist=FreqDist(samples)`     | 创建包含给定样本的频率分布                    |\n",
    "| `fdist['word']`              | 给定样本出现的次数                            |\n",
    "| `fdist.freq('word')`          | 给定样本出现的频率                            |\n",
    "| `fdist.N()`                   | 样本的总数                                    |\n",
    "| `fdist.keys()`                | 以频率递减顺序显示样本(好像是错的)            |\n",
    "| `for word in fdist:`          | 以频率递减顺序遍历样本                        |\n",
    "| `fdist.most_common(n)`        | 以频率递减顺序显示(样本，次数)元组链表        |\n",
    "| `fdist.max()`                 | 数值最大的样本                                |\n",
    "| `fdist.tabulate()`            | 输出频率分布表                                |\n",
    "| `fdist.plot()`                | 绘制频率分布图                                |\n",
    "| `fdist.plot(cumulative=True)` | 绘制累积频率分布图                            |\n",
    "| `fdist1 |= fdist2`           | 基于 fdist2 的统计数据更新 fdist1             |\n",
    "| `fdist1 < fdist2`             | 测试样本出现的频率 fdist1 中是否小于 fdist2中 |"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "文本中单词长度序列中的关键字： [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20]\n文本中单词长度序列中的低频关键字(即这类单词长度的单词在文本中出现的较少)： [18, 20]\n文本中单词的个数： 260819 == 260819\n"
     ]
    }
   ],
   "source": [
    "print(\"文本中单词长度序列中的关键字：\",sorted(fdist_word_len_1.keys()))\n",
    "print(\"文本中单词长度序列中的低频关键字(即这类单词长度的单词在文本中出现的较少)：\",fdist_word_len_1.hapaxes())\n",
    "print(\"文本中单词的个数：\",fdist_word_len_1.N(),\"==\",len(text1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "文本中 5 个出现频率最高的单词长度序列及其出现次数 = [(3, 50223), (1, 47933), (4, 42345), (2, 38513), (5, 26597)]\n文本中的样本总数=  260819\n文本中单词长度序列最多的单词出现次数=  3\n文本中单词长度为 3 的单词出现次数=  50223\n文本中单词长度为 3 的单词出现占比=  0.19255882431878046\n"
     ]
    }
   ],
   "source": [
    "print(\"文本中 5 个出现频率最高的单词长度序列及其出现次数 =\", fdist_word_len_1.most_common(5))\n",
    "print(\"文本中的样本总数= \", fdist_word_len_1.N())\n",
    "print(\"文本中单词长度序列最多的单词出现次数= \", fdist_word_len_1.max())\n",
    "print(\"文本中单词长度为 3 的单词出现次数= \", fdist_word_len_1[3])\n",
    "print(\"文本中单词长度为 3 的单词出现占比= \", fdist_word_len_1.freq(3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "
     ]
    }
   ],
   "source": [
    "# 以频率递减的顺序遍历样本\n",
    "for i, element in enumerate(fdist_word_len_1.elements()):\n",
    "    if i<100:   # 避免消耗过长时间输出数据\n",
    "        print(element, end=', ')"
   ]
  }
 ]
}